#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import os
import shutil
import zipfile
from os.path import join, getsize
def unzip_file(zip_src, dst_dir):
    r = zipfile.is_zipfile(zip_src)
    if r:     
        fz = zipfile.ZipFile(zip_src, 'r')
        for file in fz.namelist():
            fz.extract(file, dst_dir)       
    else:
        print('This is not zip')
#解压文件函数
import re
import collections
import jieba.posseg as pseg
import jieba
def deal(txt_name):
    s = open(r"txt_name.txt",encoding="GBK")
    text = s.read()
    uncn = re.compile(r'[\u0061-\u007a,\u0020]')
    cn = re.compile(r'[\u4e00-\u9fa5]')
    en = "".join(uncn.findall(text.lower()))
    cn = "".join(cn.findall(text))
    with open(r'Desktop/分割文件.txt','w+',encoding = "GBK")as f:
        f.write(en)
        f.write(cn)
    f = open(r"Desktop/分割文件.txt",encoding="GBK")
    a = f.read().split()
    words_dic = {}
    cnw="".join(cn.findall(text))
    cn_word_pos = pseg.cut(cnw)
    for k in a:
        if k in words_dic:
            words_dic[k] += 1
        else:
            words_dic[k] = 1
    with open("Desktop/词频统计1.txt"+'.csv', 'a+') as f:
            [f.write('{0},{1}\n'.format(key, value)) for key, value in words_dic.items()]
    f.close()
unzip_file('C:\\Users\Zehui Liu\Desktop','C:\\Users\Zehui Liu\Desktop\books')
for filename in os.listdir('C:\\Users\Zehui Liu\Desktop\books'):
    deal(filename)

